import requests
from lxml import html


def spider(sn , goods_list):
    """爬取当当网的数据"""
    url = 'http://search.dangdang.com/?key={sn}&act=input&page_index=6'.format(sn=sn)

    # 获取html内容
    html_data = requests.get(url).text

    # xpath对象
    selector = html.fromstring(html_data)

    ul_list = selector.xpath('//div[@id="search_nature_rg"]/ul/li')
    print(len(ul_list))

    # 循环获取列表数据
    for li in ul_list:

        # 标题
        title = li.xpath('a/@title')
        print(title[0])

        # 购买连接
        link = li.xpath('a/@href')
        print(link)

        # 价格
        price = li.xpath('p[@class="price"]/span[@class = "search_now_price"]/text()')
        if len(price) == 0:
            print('搜索不到价格')
        else:
            print(price[0].replace('¥',''))
        # 商家
        store = li.xpath('p[@class="search_shangjia"]/a/text()')
        if len(store) == 0:
            store = '当当自营'
        else:
            store = store[0]
        print(store)

        #把数据添加到list
        if(len(title[0]) ==0):
            title[0] = "无商品名信息"
        goods_list.append({
            'title':title[0],
            'price':price[0],
            'href' :link[0]
        })


if __name__ == '__main__':
    sn = '9787020002207'
    spider(sn,[])
